clear all

local startyear = 2002
local endyear = 2018 
local IPEDSdir ../Data/IPEDS/Files // ~/Research/StudentDebt/Data/IPEDS

* Comment out this line if you do NOT want to try using the ``rv'' files first:
local rv = "rv_"

forvalues year = `startyear'/`endyear' {

	di "Processing `year'-`:di `year'+1'..."

	* Save some strings to match the various year naming conventions of the different forms
		local nextyear = `year'+1
		local year2 = substr("`year'",3,2)
		local nextyear2 = substr("`nextyear'",3,2)

	* IPEDS survey: Institutional characteristics
		* Title: Educational offerings, organization, services and athletic associations
		// cntlaffi: Institutional control or affiliation. 1=public, 2=for-profit, 3=non-profit secular, 4=non-profit religious.
		clear
		capture insheet using `IPEDSdir'/ic`year'_`rv'data_stata.csv
		if _rc ~= 0 insheet using `IPEDSdir'/ic`year'_data_stata.csv
		keep unitid cntlaffi 
		tempfile IC
		save `IC'
		* Title: Student charges for academic year programs
		// chg1ay3: Tuition and fees charged to full-time, first-time undergraduate students.
		clear
		capture insheet using `IPEDSdir'/ic`year'_ay_`rv'data_stata.csv
		if _rc ~= 0 insheet using `IPEDSdir'/ic`year'_ay_data_stata.csv
		keep unitid chg1ay3 
		tempfile IC_AY
		save `IC_AY'
		* Title: Directory information
		// opeid: identifier to merge with TitleIV. 
		// stabbr: state of location.
		clear
		capture insheet using `IPEDSdir'/hd`year'_`rv'data_stata.csv
		if _rc ~= 0 insheet using `IPEDSdir'/hd`year'_data_stata.csv
		isid unitid
		keep unitid instnm opeid stabbr 
		tostring opeid, replace
		replace opeid = "" if opeid == "-2" | opeid == " "
		replace opeid = "0" + opeid if opeid ~= "" & length(opeid) < 8
		replace opeid = "0" + opeid if opeid ~= "" & length(opeid) < 8
			count if length(opeid) ~= 8 & opeid ~= ""
			if r(N) > 0 exit 111
		tempfile HD
		save `HD'

	* IPEDS survey: Student financial aid and net price
		* Title: Student financial aid and net price
		// grn42: Number of "Group 4" students, meaning first-time full-time degree-seeking undergraduates who received financial aid
		// grn4n12: Of that group, the number who come from households with income $30k or below
		// grn4n22: Number who come from households with income $30k to $48k
		// We'll use a higher presence of low-income students as of 2010 to proxy for student credit constraints.
		// The figures aren't present before 2008, but we don't actually need them, so the code here essentially just creates an empty dataset for those years.
		clear
		capture insheet using `IPEDSdir'/sfa`year2'`nextyear2'_`rv'data_stata.csv
		if _rc ~= 0 insheet using `IPEDSdir'/sfa`year2'`nextyear2'_data_stata.csv
		isid unitid
		if `year' < 2008 keep unitid
		if `year' >= 2008 keep unitid grn4n2 grn4n12 grn4n22
		tempfile SFA
		save `SFA'

	* IPEDS survey: 12-month enrollment
		* Title: 12-month unduplicated headcount by race/ethnicity, gender and level of student
		// fyrace24 before 2008, efytotlt after 2008: Total number in the category defined by the effylev value above (e.g. 2 means undergraduates)
		// This survey is coded for *next* year because it looks backwards over the whole year
		clear
		capture insheet using `IPEDSdir'/effy`nextyear'_`rv'data_stata.csv
		if _rc ~= 0 insheet using `IPEDSdir'/effy`nextyear'_data_stata.csv
		keep if effylev == 2 // Undergraduate students
		isid unitid
		if `nextyear' < 2008 rename fyrace24 efytotlt
		keep unitid efytotlt
		tempfile EFFY
		save `EFFY'
		* 12-month instructional activity
		clear
		capture insheet using `IPEDSdir'/efia`nextyear'_`rv'data_stata.csv
		if _rc ~= 0 insheet using `IPEDSdir'/efia`nextyear'_data_stata.csv
		isid unitid
		* cdactua: credit hours of undergraduate instruction (used to define FTEs)
		keep unitid cdactua 
		tempfile EFIA
		save `EFIA'

	* IPEDS survey: Finance.

		* Title: Public institutions using GASB:
		* We will skip these, because we do not use them in our ultimate analysis anyway, and GASB accounting is very different in some ways from FASB.

		* Title: Private non-profit institutions, or public institutions using FASB
		// We pull a wide range of numbers related to balance sheet, revenues, and expenses, as reflected in the mnemonics below.
		clear
		capture insheet using `IPEDSdir'/f`year2'`nextyear2'_f2_`rv'data_stata.csv
		if _rc ~= 0 insheet using `IPEDSdir'/f`year2'`nextyear2'_f2_data_stata.csv
		isid unitid
		keep unitid f2a06 f2d* f2e*1

		rename f2a06 net_assets

		rename f2d01 tuition_fees
		rename f2d02 federal_appropriations
		rename f2d03 state_appropriations
		rename f2d04 local_appropriations
		rename f2d05 federal_grants_contracts
		rename f2d06 state_grants_contracts
		rename f2d07 local_grants_contracts
		rename f2d08 pvgifts_grants_contracts
		rename f2d09 affiliate_contributions
		rename f2d10 investment_return
		rename f2d11 educational_sales
		rename f2d12 auxent_revenue
		rename f2d13 hospital_revenue
		rename f2d14 independent_ops_revenue
		rename f2d15 other_revenue
		rename f2d16 total_revenues

		rename f2e011 instrl_expense
		rename f2e021 rsrch_expense
		rename f2e031 svc_expense
		rename f2e041 acsupp_expense
		rename f2e051 studentsvc_expense
		rename f2e061 instlsupp_expense
		rename f2e071 auxent_expense
		rename f2e081 netgrantaid_expense
		rename f2e091 hospsvc_expense
		rename f2e101 indops_expense
		if `year' <= 2014 rename f2e111 plant_expense // 14-15 is last year this is reported separately. In fact it is always zero in our sample.
		rename f2e121 other_expense
		rename f2e131 total_expenses

		* Aggregate some categories of revenues and expenses that are not reported seperately at for-profit colleges (see below)
		egen fedl_apgtcx = rowtotal(federal_appropriations federal_grants_contracts) // to match for-profit category f3d02
		egen statelocal_apgtcx = rowtotal(state_appropriations local_appropriations state_grants_contracts local_grants_contracts) // ...f3d03
		egen rsrch_svc_expense = rowtotal(rsrch_expense svc_expense) //  ...f3e02
		egen support_expense = rowtotal(acsupp_expense studentsvc_expense instlsupp_expense) // ...f3e03

		g FinanceCode = "F2"
		tempfile F2
		save `F2'

		* Title: Private for-profit institutions
		// Pull the same information as we did above for non-profit colleges
		clear
		capture insheet using `IPEDSdir'/f`year2'`nextyear2'_f3_`rv'data_stata.csv
		if _rc ~= 0 insheet using `IPEDSdir'/f`year2'`nextyear2'_f3_data_stata.csv
		isid unitid
		keep unitid f3*
		if `year' >= 2013 {
			foreach cat in 01 02a 02b 03a 03b 03c 04 06 07 10 {
				* Total amount for each category ends in 1:
				rename f3e`cat'1 f3e`cat' 
				* Separate entries f3e0*(2-7) break out the total amount by subcategories, but we don't need this level of detail.
			}
			if `year' == 2013 rename f3e111 f3e11 
			egen f3e02 = rowtotal(f3e02a f3e02b)
			egen f3e03 = rowtotal(f3e03a f3e03b f3e03c)
			rename f3e051 f3e05
		}

		keep unitid f3a03 f3d* f3e*

		rename f3a03 total_equity

		rename f3d01 tuition_fees
		rename f3d02 fedl_apgtcx
		rename f3d03 statelocal_apgtcx
		rename f3d04 pvgifts_grants_contracts
		rename f3d05 investment_return
		rename f3d06 educational_sales
		rename f3d07 auxent_revenue
		rename f3d08 other_revenue
		rename f3d09 total_revenues
		if `year' >= 2013 rename f3d12 hospital_revenue 

		rename f3e01 instrl_expense
		rename f3e02 rsrch_svc_expense
		rename f3e03 support_expense
		rename f3e04 auxent_expense
		rename f3e05 netgrantaid_expense
		rename f3e06 other_expense
		rename f3e07 total_expenses
		if `year' >= 2013 rename f3e10 hospsvc_expense 
		if `year' == 2013 rename f3e11 plant_expense 
		
		g FinanceCode = "F3"
		tempfile F3
		save `F3'

		use `F2', clear
		append using `F3'
		tempfile Finance
		save `Finance'
	
	clear
	use `IC'
	merge 1:1 unitid using `IC_AY', nogen 
	merge 1:1 unitid using `HD', nogen 
	merge 1:1 unitid using `SFA', nogen 
	merge 1:1 unitid using `EFFY', nogen 
	merge 1:1 unitid using `EFIA', nogen
	merge 1:1 unitid using `Finance', nogen 

	g awardyear = `year'
	tempfile `year'
	save ``year''

}

clear
forvalues year = `startyear'/`endyear' {
	append using ``year''
}

save ../Data/IPEDS/IPEDS, replace
